import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource
from bokeh.models import NumeralTickFormatter
from bokeh.io import output_notebook
output_notebook()
import plotly.express as px
import plotly.io as pio
pio.renderers.default='notebook'
import seaborn as sns
#Setup names for our family.
females = ["Sarah"]
males = ["Jason","Leon","Conrad"]
#Create some lists to select or reformat data later
count_cols = [] # "1996 Count", ... "2021 Count" ** Used to drop/rename columns
rank_cols = [] # "1996 Rank", ... "2021 Rank" ** Used to drop/rename columns
years = [] # "1996", ... "2021" ** Used to wrangle data and rename columns
for f in range(1996,2022):
count_cols.append(str(f) + ' Count')
rank_cols.append(str(f) + ' Rank')
years.append(str(f))
#Import ONS data
df1 = pd.read_excel("babynames1996to2021.xlsx","1",skiprows =7) # Boys names in workbook 1
df2 = pd.read_excel( 'babynames1996to2021.xlsx',"2",skiprows =7) # Girls names in workbook 2
df1 = df1.set_index("Name")
df2 = df2.set_index("Name")
#Replace NaN with 0
df1 = df1.replace("[x]",0)
df2 = df2.replace("[x]",0)
#Filter names from ONS data
Girls = df2.loc[females]
Boys = df1.loc[males]
df_family = pd.concat([Girls, Boys], axis=0)
#Drop rank and count columns
df_family_counts = df_family.drop(columns=rank_cols)
df_family_ranks = df_family.drop(columns=count_cols)
df_family_counts.columns = df_family_counts.columns.str.replace(" Count","")
df_family_ranks.columns = df_family_ranks.columns.str.replace(" Rank","")
df_family_counts = pd.pivot_table(df_family_counts, values = years, columns=["Name"])
df_family_counts.index.name = "Year"
df_family_ranks = pd.pivot_table(df_family_ranks, values = years, columns=["Name"])
df_family_ranks.index.name = "Year"
df_family_ranks.tail(14)
| Name | Conrad | Jason | Leon | Sarah |
|---|---|---|---|---|
| Year | ||||
| 2008 | 489 | 139 | 62 | 72 |
| 2009 | 473 | 145 | 60 | 79 |
| 2010 | 536 | 161 | 60 | 83 |
| 2011 | 576 | 159 | 62 | 90 |
| 2012 | 734 | 165 | 72 | 95 |
| 2013 | 611 | 155 | 75 | 96 |
| 2014 | 714 | 162 | 85 | 95 |
| 2015 | 779 | 163 | 85 | 96 |
| 2016 | 870 | 170 | 92 | 96 |
| 2017 | 861 | 179 | 98 | 103 |
| 2018 | 851 | 174 | 97 | 103 |
| 2019 | 1152 | 175 | 93 | 107 |
| 2020 | 921 | 213 | 93 | 115 |
| 2021 | 1448 | 210 | 101 | 125 |
df_family_counts.tail(14)
| Name | Conrad | Jason | Leon | Sarah |
|---|---|---|---|---|
| Year | ||||
| 2008 | 71 | 404 | 1138 | 930 |
| 2009 | 76 | 406 | 1155 | 793 |
| 2010 | 64 | 344 | 1204 | 722 |
| 2011 | 61 | 360 | 1069 | 663 |
| 2012 | 45 | 359 | 918 | 592 |
| 2013 | 56 | 372 | 837 | 574 |
| 2014 | 45 | 353 | 765 | 601 |
| 2015 | 41 | 355 | 795 | 581 |
| 2016 | 36 | 340 | 737 | 572 |
| 2017 | 36 | 314 | 669 | 530 |
| 2018 | 36 | 311 | 655 | 503 |
| 2019 | 23 | 302 | 662 | 479 |
| 2020 | 31 | 240 | 620 | 422 |
| 2021 | 17 | 251 | 590 | 403 |
# create a new plot with a title and axis labels
source = ColumnDataSource(df_family_counts)
p = figure(title="UK baby count for selected names", x_axis_label="", y_axis_label="Babies",x_range=years, width=900, height=480)
# add a line renderer with legend and line thickness
p.line(x = "Year", y = "Jason", legend_label="Jason", line_width=2,source=source)
p.line(x = "Year", y = "Sarah", legend_label="Sarah", line_color="red",line_width=2,source=source)
p.line(x = "Year", y = "Leon", legend_label="Leon", line_color="yellow",line_width=2,source=source)
p.line(x = "Year", y = "Conrad", legend_label="Conrad", line_color="darkgrey",line_width=2,source=source)
# show the results
show(p)
# create a new plot with a title and axis labels
source = ColumnDataSource(df_family_ranks)
p = figure(title="UK baby name rankings", x_axis_label="Year", y_axis_label="Name UK rank",x_range=years, width=900, height=480)
# add a line renderer with legend and line thickness
p.line(x = "Year", y = "Jason", legend_label="Jason", line_width=2,source=source)
p.line(x = "Year", y = "Sarah", legend_label="Sarah", line_color="red",line_width=2,source=source)
p.line(x = "Year", y = "Leon", legend_label="Leon", line_color="yellow",line_width=2,source=source)
p.line(x = "Year", y = "Conrad", legend_label="Conrad", line_color="darkgrey",line_width=2,source=source)
# show the results
show(p)
df1_totals = df1.drop(columns=rank_cols).sum()
df2_totals = df2.drop(columns=rank_cols).sum()
df1_totals = df1_totals.reset_index()
df2_totals = df2_totals.reset_index()
df_totals = pd.concat([df1_totals, df2_totals], axis=0)
df_totals = df_totals.replace(count_cols,years)
#df_totals = df1_totals + df2_totals
df_totals.columns=["Years","Total"]
df_totals = df_totals.groupby("Years").sum()
df_totals.plot(title="Total UK baby names",figsize=(11,6),grid=True)
<AxesSubplot:title={'center':'Total UK baby names'}, xlabel='Years'>
df_family_counts.plot(title="UK baby count for selected names",figsize=(11,6),grid=True)
<AxesSubplot:title={'center':'UK baby count for selected names'}, xlabel='Year'>
fig, ax = plt.subplots(figsize=(15, 8))
sns.lineplot(df_family_counts).set(title="UK baby count for selected names")
[Text(0.5, 1.0, 'UK baby count for selected names')]
px.line(df_totals, title = "Total UK baby names")
px.line(df_family_counts, title="UK baby count for selected names")